library(Matrix)
library(Matrix.utils)
library(tidyverse)
## ── Attaching core tidyverse packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.1 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ readr::col_factor() masks scales::col_factor()
## ✖ purrr::discard() masks scales::discard()
## ✖ tidyr::expand() masks Matrix::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tidyr::pack() masks Matrix::pack()
## ✖ tidyr::unpack() masks Matrix::unpack()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(Seurat)
## Attaching SeuratObject
library(dplyr)
library(knitr)
library(ggsci)
library(RCurl)
## Warning: package 'RCurl' was built under R version 4.2.3
##
## Attaching package: 'RCurl'
##
## The following object is masked from 'package:tidyr':
##
## complete
library(cowplot)
##
## Attaching package: 'cowplot'
##
## The following object is masked from 'package:lubridate':
##
## stamp
library(SingleR)
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
##
## The following object is masked from 'package:dplyr':
##
## count
##
##
## Attaching package: 'MatrixGenerics'
##
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
##
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
##
## The following objects are masked from 'package:lubridate':
##
## intersect, setdiff, union
##
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
##
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
##
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
##
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
##
## The following objects are masked from 'package:lubridate':
##
## second, second<-
##
## The following objects are masked from 'package:dplyr':
##
## first, rename
##
## The following object is masked from 'package:tidyr':
##
## expand
##
## The following objects are masked from 'package:Matrix':
##
## expand, unname
##
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
##
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
##
## The following object is masked from 'package:lubridate':
##
## %within%
##
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
##
## The following object is masked from 'package:purrr':
##
## reduce
##
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
##
## Attaching package: 'Biobase'
##
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
##
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
##
##
## Attaching package: 'SummarizedExperiment'
##
## The following object is masked from 'package:SeuratObject':
##
## Assays
##
## The following object is masked from 'package:Seurat':
##
## Assays
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
library(multtest)
library(clustree)
## Loading required package: ggraph
library(limma)
##
## Attaching package: 'limma'
##
## The following object is masked from 'package:BiocGenerics':
##
## plotMA
#Quality control metrices
load("~/work/PI3K/data/SeuratObjects/merged/Mouse/merged_seurat_mouse.RData")
merged_filtered_seurat <- subset(merged_seurat, subset = Sample_Tag != 'NA' & Sample_Tag != 'Undetermined' & Sample_Tag != 'SampleTag01_hs' & Sample_Tag != 'Multiplet')
merged_filtered_seurat$nCount_RNA <- NULL
merged_filtered_seurat$nFeature_RNA <- NULL
merged_filtered_seurat_meta <- merged_filtered_seurat@meta.data
subset_control <- subset(merged_filtered_seurat_meta, Sample_Tag == "Control" | Sample_Tag == "Control_tagged")
subset_alpelisib <- subset(merged_filtered_seurat_meta, Sample_Tag == "Alpelisib")
subset_copanlisib <- subset(merged_filtered_seurat_meta, Sample_Tag == "Copanlisib")
#cell count
subset_control %>%
ggplot(aes(x=Xenograft, fill=Xenograft)) +
geom_bar() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
theme(plot.title = element_text(hjust=0.5, face="bold")) +
ggtitle("NCells from the untreated xenografts")+
scale_fill_lancet()
The figures are showing different quality metrices for the control samples.
# Extract identity and sample information from dataframe to determine the number of cells per sample_tag
n_cells_control<- table(subset_control$Sample_Tag)
# View table
kable(n_cells_control,
col.names = c("Sample_Tag",
"nCells"),
caption = "The table shows the number of cells which were untreated.")
| Sample_Tag | nCells |
|---|---|
| Control | 17095 |
#UMI count
subset_control %>%
ggplot(aes(color=Xenograft, x=nUMI, fill=Xenograft)) +
geom_density(alpha = 0.2) +
scale_x_log10() +
ylab("Cell density") +
geom_vline(xintercept = 500)+
theme(plot.title = element_text(hjust=0.5, face="bold")) +
ggtitle("nUMi from the control xenografts")+
scale_fill_lancet()
The figures are showing different quality metrices for the control samples.
#Gene Count
subset_control %>%
ggplot(aes(color=Xenograft, x=nGene, fill=Xenograft)) +
geom_density(alpha = 0.2) +
scale_x_log10() +
geom_vline(xintercept = 300)+
theme(plot.title = element_text(hjust=0.5, face="bold")) +
ggtitle("nGene from the control xenografts")+
scale_fill_lancet()
The figures are showing different quality metrices for the control samples.
#joint filtering effect
subset_control %>%
ggplot(aes(x=nUMI, y=nGene)) +
geom_point() +
scale_colour_gradient(low = "gray90", high = "black") +
stat_smooth(method=lm) +
scale_x_log10() +
scale_y_log10() +
theme_classic() +
geom_vline(xintercept = 500) +
geom_hline(yintercept = 250) +
facet_wrap(~Sample_Tag)+
scale_fill_lancet()
## `geom_smooth()` using formula = 'y ~ x'
The figures are showing different quality metrices for the control samples.
#Mitochondrial count
subset_control %>%
ggplot(aes(color=Xenograft, x=percent.mt, fill=Xenograft)) +
geom_density(alpha = 0.2) +
scale_x_log10() +
geom_vline(xintercept = 2)+
theme(plot.title = element_text(hjust=0.5, face="bold")) +
ggtitle("Mitochondrial gene expression in [%] \n from the control xenografts")+
scale_fill_lancet()
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 24 rows containing non-finite values (`stat_density()`).
The figures are showing different quality metrices for the control samples.
#cell count
subset_copanlisib %>%
ggplot(aes(x=Xenograft, fill=Xenograft)) +
geom_bar() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
theme(plot.title = element_text(hjust=0.5, face="bold")) +
ggtitle("NCells from the xenografts treated with Copanlisib")+
scale_fill_lancet()
The figures are showing different quality metrices for the copanlisib samples.
# Extract identity and sample information from dataframe to determine the number of cells per sample_tag
n_cells_copanlisib<- table(subset_copanlisib$Sample_Tag)
# View table
kable(n_cells_copanlisib,
col.names = c("Sample_Tag",
"nCells"),
caption = "The table shows the number of cells which were treated with Copanlisib.")
| Sample_Tag | nCells |
|---|---|
| Copanlisib | 3382 |
#UMI count
subset_copanlisib %>%
ggplot(aes(color=Xenograft, x=nUMI, fill=Xenograft)) +
geom_density(alpha = 0.2) +
scale_x_log10() +
ylab("Cell density") +
geom_vline(xintercept = 500)+
theme(plot.title = element_text(hjust=0.5, face="bold")) +
ggtitle("nUMI from the xenografts treated with Copanlisib")+
scale_fill_lancet()
The figures are showing different quality metrices for the copanlisib samples.
#Gene Count
subset_copanlisib %>%
ggplot(aes(color=Xenograft, x=nGene, fill=Xenograft)) +
geom_density(alpha = 0.2) +
scale_x_log10() +
geom_vline(xintercept = 300)+
theme(plot.title = element_text(hjust=0.5, face="bold")) +
ggtitle("nGene from the xenografts treated with Copanlisib")+
scale_fill_lancet()
The figures are showing different quality metrices for the copanlisib samples.
#joint filtering effect
subset_copanlisib %>%
ggplot(aes(x=nUMI, y=nGene)) +
geom_point() +
scale_colour_gradient(low = "gray90", high = "black") +
stat_smooth(method=lm) +
scale_x_log10() +
scale_y_log10() +
geom_vline(xintercept = 500) +
geom_hline(yintercept = 250) +
facet_wrap(~Sample_Tag)+
scale_fill_lancet()
## `geom_smooth()` using formula = 'y ~ x'
The figures are showing different quality metrices for the copanlisib samples.
#Mitochondrial count
subset_copanlisib %>%
ggplot(aes(color=Xenograft, x=percent.mt, fill=Xenograft)) +
geom_density(alpha = 0.2) +
scale_x_log10() +
geom_vline(xintercept = 2)+
theme(plot.title = element_text(hjust=0.5, face="bold")) +
ggtitle("Mitochondrial gene expression in [%] from \n the xenografts treated with Copanlisib")+
scale_fill_lancet()
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 4 rows containing non-finite values (`stat_density()`).
The figures are showing different quality metrices for the copanlisib samples.
#cell count
subset_alpelisib %>%
ggplot(aes(x=Xenograft, fill=Xenograft)) +
geom_bar() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
theme(plot.title = element_text(hjust=0.5, face="bold")) +
ggtitle("NCells from the xenografts treated with Alpelisib")+
scale_fill_lancet()
The figures are showing different quality metrices for the alpelisib samples.
# Extract identity and sample information from dataframe to determine the number of cells per sample_tag
n_cells_alp<- table(subset_alpelisib$Sample_Tag)
# View table
kable(n_cells_alp,
col.names = c("Sample_Tag",
"nCells"),
caption = "The table shows the number of cells which were treated with Alpelisib.")
| Sample_Tag | nCells |
|---|---|
| Alpelisib | 21531 |
#UMI count
subset_alpelisib %>%
ggplot(aes(color=Xenograft, x=nUMI, fill=Xenograft)) +
geom_density(alpha = 0.2) +
scale_x_log10() +
ylab("Cell density") +
geom_vline(xintercept = 500)+
theme(plot.title = element_text(hjust=0.5, face="bold")) +
ggtitle("nUMI from the xenografts treated with Alpelisib")+
scale_fill_lancet()
The figures are showing different quality metrices for the alpelisib samples.
#Gene Count
subset_alpelisib %>%
ggplot(aes(color=Xenograft, x=nGene, fill=Xenograft)) +
geom_density(alpha = 0.2) +
scale_x_log10() +
geom_vline(xintercept = 300)+
theme(plot.title = element_text(hjust=0.5, face="bold")) +
ggtitle("nGene from the xenografts treated with Alpelisib")+
scale_fill_lancet()
The figures are showing different quality metrices for the alpelisib samples.
#joint filtering effect
subset_alpelisib %>%
ggplot(aes(x=nUMI, y=nGene)) +
geom_point() +
scale_colour_gradient(low = "gray90", high = "black") +
stat_smooth(method=lm) +
scale_x_log10() +
scale_y_log10() +
geom_vline(xintercept = 500) +
geom_hline(yintercept = 250) +
facet_wrap(~Sample_Tag)+
scale_fill_lancet()
## `geom_smooth()` using formula = 'y ~ x'
The figures are showing different quality metrices for the alpelisib samples.
#Mitochondrial count
subset_alpelisib %>%
ggplot(aes(color=Xenograft, x=percent.mt, fill=Xenograft)) +
geom_density(alpha = 0.2) +
scale_x_log10() +
geom_vline(xintercept = 2)+
theme(plot.title = element_text(hjust=0.5, face="bold")) +
ggtitle("Mitochondrial gene expression in [%] from \n the xenografts treated with Alpelisib")+
scale_fill_lancet()
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 12 rows containing non-finite values (`stat_density()`).
The figures are showing different quality metrices for the alpelisib samples.
merged_filtered_seurat_meta %>%
ggplot(aes(x=Xenograft, fill=nUMI)) +
geom_bar() +
labs(x="Xenografts", y="Number of cells",
title="Overview of the number of cells by xenografts and condition") +
facet_grid(~Sample_Tag) +
theme(axis.text.x = element_text(angle=45, vjust=1, hjust=1)) +
scale_fill_lancet()
## Warning: The following aesthetics were dropped during statistical transformation: fill
## ℹ This can happen when ggplot fails to infer the correct grouping structure in the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical variable into a factor?
## The following aesthetics were dropped during statistical transformation: fill
## ℹ This can happen when ggplot fails to infer the correct grouping structure in the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical variable into a factor?
## The following aesthetics were dropped during statistical transformation: fill
## ℹ This can happen when ggplot fails to infer the correct grouping structure in the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical variable into a factor?
#Investigation of unwanted variations - cell cycle phases and mitochondrial gene expression
load("~/work/PI3K/data/SeuratObjects/filtered/Mouse/filtered_seurat.RData")
filtered_seurat$nCount_RNA <- NULL
filtered_seurat$nFeature_RNA <- NULL
filtered_seurat$orig.ident <- NULL
load("~/work/PI3K/data/SuplementaryFiles/SeuratObjects/cycle.rda")
filtered_phase_seurat <- CellCycleScoring(filtered_seurat,
g2m.features = g2m_genes,
s.features = s_genes)
## Warning: The following features are not present in the object: UBR7, RFC2,
## RAD51, MCM2, TIPIN, MCM6, UNG, POLD3, WDR76, CLSPN, CDC45, CDC6, MSH2, MCM5,
## POLA1, MCM4, RAD51AP1, GMNN, RPA2, CASP8AP2, HELLS, E2F8, GINS2, PCNA, NASP,
## BRIP1, DSCC1, DTL, CDCA7, CENPU, ATAD2, CHAF1B, USP1, SLBP, RRM1, FEN1, RRM2,
## EXO1, CCNE2, TYMS, BLM, PRIM1, UHRF1, not searching for symbol synonyms
## Warning: The following features are not present in the object: NCAPD2, ANLN,
## TACC3, HMMR, GTSE1, NDC80, AURKA, TPX2, BIRC5, G2E3, CBX5, RANGAP1, CTCF,
## CDCA3, TTK, SMC4, ECT2, CENPA, CDC20, NEK2, CENPF, TMPO, HJURP, CKS2, DLGAP5,
## PIMREG, TOP2A, PSRC1, CDCA8, CKAP2, NUSAP1, KIF23, KIF11, KIF20B, CENPE,
## GAS2L3, KIF2C, NUF2, ANP32E, LBR, MKI67, CCNB2, CDC25C, HMGB2, CKAP2L, BUB1,
## CDK1, CKS1B, UBE2C, CKAP5, AURKB, CDCA2, TUBB4B, JPT1, not searching for symbol
## synonyms
## Warning in AddModuleScore(object = object, features = features, name = name, :
## Could not find enough features in the object from the following feature lists:
## S.Score Attempting to match case...Could not find enough features in the object
## from the following feature lists: G2M.Score Attempting to match case...
filtered_phase_seurat <- FindVariableFeatures(filtered_phase_seurat,
selection.method = "vst",
nfeatures = 2000,
verbose = FALSE)
filtered_phase_seurat <- ScaleData(filtered_phase_seurat)
## Centering and scaling data matrix
filtered_phase_seurat <- RunPCA(filtered_phase_seurat)
## PC_ 1
## Positive: Cd69, Gm26870, Olfm4, Gm9733, Retnlg, Ifnb1, Cxcr1, Cd209a, Ly6g, Il23a
## Prok2, Cd209f, Vsig4, Hba-ps3, Calm4, Usp17lb, Ifnl2, Siglech, Klri2, Hba-a2
## Cd5l, Bcl2l14, Cd209g, Alas2, Ear-ps5, Slc40a1, Cma2, Ngp, Chad, Tcrg-C4
## Negative: Rpl36a, Rps16-ps2, Rps24-ps3, Rps10-ps1, Gm19810, Rps19-ps6, Rps3a3, Rps10-ps3, Gm6025, Gm46197
## Gm2225, Gm10095, Gm41073, Gm29228, Gm6136, Gm6341, Gm46317, CT025573.1, Gm6109, Gm15896
## Rpl26-ps6, Gm5528, Gm44010, Gm7424, Gm10086, Gm14044, Gm8186, Gm19688, Hnrnpa1, Gm7565
## PC_ 2
## Positive: Sparc, Nedd4, Fstl1, Col1a2, Col1a1, Fkbp9, Col3a1, Col5a2, Crtap, Loxl1
## Serping1, Bmp1, Serpinh1, Calu, Col5a1, Dcn, Fbn1, Rnase4, Mmp2, Bgn
## Pcolce, Adamts2, Fkbp7, Rcn1, Ppic, Serpinf1, Gstm2, Ikbip, Aebp1, Rcn3
## Negative: Gm4468, Gm46197, Gm46317, Gm29228, Gm6341, Gm15896, Gm44010, Gm6025, CT025573.1, Gm41073
## Gm10095, Rps10-ps3, Gm12739, Rpl26-ps6, Gm2178, Gm5528, Gm9687, Gm14044, Gm13692, AC110534.4
## Gm14874, Gm45499, Rpl31-ps9, Gm12788, Rps19-ps6, Rpl12-ps2, Gm5928, Gm11416, AC159898.1, Gm7351
## PC_ 3
## Positive: Pltp, Ctss, Tgfbi, Cybb, Lyz2, Pfn1, Ms4a6c, Ctsc, Ms4a7, Aif1
## Msr1, Cd38, C1qb, Dab2, Stab1, F13a1, C1qc, Cd72, Saa3, Ass1
## Maf, C1qa, Sdc4, Ehd4, Prdx1, Ly86, Cd74, Fabp5, Apoe, Arg1
## Negative: Col14a1, Dpt, Clec3b, Efemp1, Gm45860, Ogn, Ly6c1, Tnxb, Ly6a, Rpl17-ps4
## Ackr3, Dpp4, Gm7285, Scara5, Adamts5, Lrrn4cl, Adgrd1, Nid1, Itih5, Rpl31-ps9
## C1s1, Lgi2, Dcn, Fndc1, Gm4468, Col3a1, Fstl1, Gm45791, Rpl12-ps2, Gm5928
## PC_ 4
## Positive: Col12a1, Wisp1, Ncam1, Spon1, Lrrc15, C1qtnf3, Cdh11, Mical2, Postn, Thbs2
## Hs6st2, Col8a1, Dkk3, Adam12, Ltbp2, Kif26b, Wnt5a, Tagln, Unc5b, Gpr176
## Col5a2, Cald1, Adamts12, Tnc, Rflnb, Tpm2, Pmepa1, Olfml3, Col6a3, Bace1
## Negative: Cd34, Ly6c1, Clec3b, Tnxb, Efemp1, Ly6a, Dpt, Scara5, Dpp4, Ackr3
## Col14a1, Plpp3, Ogn, Cadm3, Cd55, Lrrn4cl, Gas7, Tgfbr2, Adamts5, Igfbp6
## Slfn5, Heg1, Sema3c, Efhd1, Adgrd1, Gsn, Itih5, Klf4, Scara3, Uap1
## PC_ 5
## Positive: Esam, Adgrf5, Mmrn2, Flt1, Ptprb, Pdlim1, Tspan7, Plvap, Tie1, Cdh5
## Egfl7, Col4a1, Ramp2, Emcn, Col4a2, Gimap4, Aqp1, Spns2, C130074G19Rik, Col15a1
## Myct1, Sox18, Palmd, Ece1, Cyyr1, Clec14a, Crip2, Gimap6, Erg, Ablim1
## Negative: Ctss, Ms4a6c, Ms4a7, Aif1, Cybb, Pltp, Ctsc, Lyz2, Msr1, C1qc
## C1qb, Tgfbi, C1qa, Ly86, Dab2, Cd72, F13a1, Sdc4, Mmp14, Saa3
## Cd38, Lgals3bp, Maf, Rgs1, Cd74, Apoe, Gatm, Cx3cr1, Runx3, Stab1
DimPlot(filtered_phase_seurat,
reduction = "pca",
group.by= "Phase",
split.by = "Phase")+
ggtitle("PCA visualization of the cell cycle phases")+
scale_fill_lancet()
filtered_phase_seurat <- RunUMAP(filtered_phase_seurat,
dims = 1:40,
reduction = "pca")
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 17:41:08 UMAP embedding parameters a = 0.9922 b = 1.112
## 17:41:08 Read 31748 rows and found 40 numeric columns
## 17:41:08 Using Annoy for neighbor search, n_neighbors = 30
## 17:41:08 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 17:41:16 Writing NN index file to temp file /tmp/Rtmpc4EXGv/filec5c4c7c1c29c5
## 17:41:16 Searching Annoy index using 1 thread, search_k = 3000
## 17:41:25 Annoy recall = 75.37%
## 17:41:26 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 17:41:28 Initializing from normalized Laplacian + noise (using irlba)
## 17:41:31 Commencing optimization for 200 epochs, with 1606488 positive edges
## 17:41:51 Optimization finished
DimPlot(filtered_phase_seurat,
reduction = "umap",
split.by = "Phase",
group.by = "Sample_Tag") +
ggtitle("UMAP splitted by cell cycle phase \n and grouped by conditions") +
scale_fill_lancet()
FeaturePlot(filtered_phase_seurat,
features = "percent.mt")+
ggtitle("Mitochondrial gene expression distribution in clusters")+
scale_fill_lancet()
#Integration
integrated_filtered_sct_cond_seurat <- readRDS("~/work/PI3K/data/SeuratObjects/integrated/Mouse/integrated_filtered_sct_cond_seurat.rds")
p1 <- DimPlot(integrated_filtered_sct_cond_seurat, reduction = "umap", group.by = "Sample_Tag") +
labs(title = "UMAP visualisation of the condition distribution") +
scale_fill_lancet()
p2 <- DimPlot(integrated_filtered_sct_cond_seurat, reduction = "umap", label = TRUE, repel = TRUE)+
labs(title = "UMAP visualisation of the number of clusters") +
scale_fill_lancet()
p3 <- DimPlot(integrated_filtered_sct_cond_seurat, reduction = "umap", group.by = "Xenograft")+
labs(title = "UMAP visualisation of the xenograft distribution") +
scale_fill_lancet()
p1
p2
p3
#Clustering analysis
seurat_integrated <- readRDS("~/work/PI3K/data/SeuratObjects/clusteranalysis/Mouse/seurat_integrated_newidents.rds")
# Assign identity of clusters
Idents(object = seurat_integrated) <- "integrated_snn_res.1"
# Plot the UMAP
DimPlot(seurat_integrated,
reduction = "umap",
label = TRUE,
label.size = 6) +
labs(title = "UMAP visualisation shows the number of clusters \n for resolution 1.0") +
scale_fill_lancet(name="ClusterIDs")
# Assign identity of clusters
Idents(object = seurat_integrated) <- "integrated_snn_res.0.2"
# Plot the UMAP
DimPlot(seurat_integrated,
reduction = "umap",
label = TRUE,
label.size = 6)+
labs(title = "UMAP visualisation shows the number of clusters \n for the resolution 0.2") +
scale_fill_lancet(name="ClusterIDs")
# Assign identity of clusters
Idents(object = seurat_integrated) <- "integrated_snn_res.0.4"
# Plot the UMAP
DimPlot(seurat_integrated,
reduction = "umap",
label = TRUE,
label.size = 6)+
labs(title = "UMAP visualisation shows the number of clusters \n for the resolution 0.4") +
scale_fill_lancet(name="ClusterIDs")
# Assign identity of clusters
Idents(object = seurat_integrated) <- "integrated_snn_res.0.6"
# Plot the UMAP
DimPlot(seurat_integrated,
reduction = "umap",
label = TRUE,
label.size = 6)+
labs(title = "UMAP visualisation shows the number of clusters \n for the resolution 0.6") +
scale_fill_lancet(name="ClusterIDs")
# Assign identity of clusters
Idents(object = seurat_integrated) <- "integrated_snn_res.0.8"
# Plot the UMAP
DimPlot(seurat_integrated,
reduction = "umap",
label = TRUE,
label.size = 6)+
labs(title = "UMAP visualisation shows the number of clusters \n for the resolution 0.8") +
scale_fill_lancet(name="ClusterIDs")
# Assign identity of clusters
Idents(object = seurat_integrated) <- "integrated_snn_res.1.4"
# Plot the UMAP
DimPlot(seurat_integrated,
reduction = "umap",
label = TRUE,
label.size = 6)+
labs(title = "UMAP visualisation shows the number of clusters \n for the resolution 1.4") +
scale_fill_lancet(name="ClusterIDs")
# Assign identity of clusters
Idents(object = seurat_integrated) <- "integrated_snn_res.1.6"
# Plot the UMAP
DimPlot(seurat_integrated,
reduction = "umap",
label = TRUE,
label.size = 6)+
labs(title = "UMAP visualisation shows the number of clusters \n for the resolution 1.6") +
scale_fill_lancet(name="ClusterIDs")
# Assign identity of clusters
Idents(object = seurat_integrated) <- "integrated_snn_res.1.8"
# Plot the UMAP
DimPlot(seurat_integrated,
reduction = "umap",
label = TRUE,
label.size = 6)+
labs(title = "UMAP visualisation shows the number of clusters \n for the resolution 1.8") +
scale_fill_lancet(name="ClusterIDs")
# Assign identity of clusters
Idents(object = seurat_integrated) <- "integrated_snn_res.2"
# Plot the UMAP
DimPlot(seurat_integrated,
reduction = "umap",
label = TRUE,
label.size = 6)+
labs(title = "UMAP visualisation shows the number of clusters \n for the resolution 2.0") +
scale_fill_lancet(name="ClusterIDs")
# Assign identity of clusters
Idents(object = seurat_integrated) <- "integrated_snn_res.2.4"
# Plot the UMAP
DimPlot(seurat_integrated,
reduction = "umap",
label = TRUE,
label.size = 6)+
labs(title = "UMAP visualisation shows the number of clusters \n for the resolution 2.4") +
scale_fill_lancet(name="ClusterIDs")
#Celltype identification singleR
seurat_integrated_renameidents <- readRDS("~/work/PI3K/data/SeuratObjects/celltypeIdentification/Mouse/SingleR/seurat_integrated_singleR.rds")
DimPlot(object = seurat_integrated_renameidents,
reduction = "umap",
label = TRUE,
label.size = 3,
repel = TRUE) +
labs(title = "UMAP visualisation shows the different celltypes \n for the resolution 0.4") +
scale_fill_lancet(name="Celltypes")
n_cells0.4.long <- readRDS("~/work/PI3K/data/SeuratObjects/celltypeIdentification/Mouse/SingleR/n_cells0.4_mouse.rds")
plt_0.4 <- ggplot(n_cells0.4.long,aes(ClusterID, singlr_labels)) +
geom_tile(aes(fill=Occurances)) +
geom_text(aes(label = round(Occurances, 1)), size=2) +
scale_fill_gradient(low = "white", high = "red") +
labs(y="Celltypes")
plt_0.4
#Label comparison
df_merged <- readRDS("~/work/PI3K/data/SeuratObjects/celltypeIdentification/df_merged_singleR_labels.rds")
plt_merged <- ggplot(df_merged,aes(x = singlr_labels.human, y= singlr_labels.mouse)) +
geom_tile(aes(fill=Cell_Index_count)) +
geom_text(aes(label = Cell_Index_count), size=1.5) +
scale_fill_gradient(low = "white", high = "red") +
labs(x="Celltypes_human", y="Celltypes_mouse") +
theme(axis.text.x = element_text(angle=90))
plt_merged
#Transcript analysis
umi_counts_merged <- read_delim("~/work/PI3K/data/SuplementaryFiles/CSVFiles/umi_count_merged.csv")
## Rows: 34609 Columns: 4
## ── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Cell_Index, Labels
## dbl (2): UMIs_human, UMIs_mouse
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#create scatter plot
sp <- ggplot(umi_counts_merged, aes(x = UMIs_human, y = UMIs_mouse, fill=Labels)) + geom_point(aes(colour = Labels))
sp + geom_abline() +
xlab("Transcripts human") +
ylab("Transcripts mouse")
#Celltype identification demultiplexed
seurat_integrated <- readRDS("~/work/PI3K/data/SeuratObjects/cellfiltering_umi/Mouse/seurat_integrated_with_spec_labels_mouse.rds")
seurat_integrated_filtered_mouse <- readRDS("~/work/PI3K/data/SeuratObjects/demultiplexed/Mouse/seurat_integrated_filtered_mouse.rds")
UMAP_mouse <- DimPlot(seurat_integrated_filtered_mouse, reduction = "umap", group.by = "Labels", cols = c("red", "grey", "lightgrey")) + ggtitle("UMAP mouse cells")
UMAP_mouse
seurat_integrated_filtered_mouse <- RunUMAP(seurat_integrated_filtered_mouse, dims = 1:30)
## 17:44:24 UMAP embedding parameters a = 0.9922 b = 1.112
## 17:44:24 Read 31748 rows and found 30 numeric columns
## 17:44:24 Using Annoy for neighbor search, n_neighbors = 30
## 17:44:24 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 17:44:27 Writing NN index file to temp file /tmp/Rtmpc4EXGv/filec5c4c569f7c9e
## 17:44:27 Searching Annoy index using 1 thread, search_k = 3000
## 17:44:35 Annoy recall = 100%
## 17:44:36 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 17:44:38 Initializing from normalized Laplacian + noise (using irlba)
## 17:44:41 Commencing optimization for 200 epochs, with 1387376 positive edges
## 17:44:58 Optimization finished
DimPlot(seurat_integrated_filtered_mouse, reduction = "umap", group.by = "Sample_Tag") + ggtitle("UMAP mouse cells by conditions")
metadata_mouse <- seurat_integrated@meta.data
metadata_mouse %>% select(2,26) -> metadata_mouse
table(metadata_mouse)
## Labels
## Xenograft mainly_human mainly_mouse unique_mouse
## HN10621 74 3327 1742
## HN10960 1 106 69
## HN11097_Control 0 576 339
## HN11097_treated 27 650 329
## HN15239A_Alpelisib 84 7730 5038
## HN15239A_Control 29 10127 1500
prop.table(table(metadata_mouse))
## Labels
## Xenograft mainly_human mainly_mouse unique_mouse
## HN10621 2.330855e-03 1.047940e-01 5.486960e-02
## HN10960 3.149805e-05 3.338793e-03 2.173365e-03
## HN11097_Control 0.000000e+00 1.814288e-02 1.067784e-02
## HN11097_treated 8.504473e-04 2.047373e-02 1.036286e-02
## HN15239A_Alpelisib 2.645836e-03 2.434799e-01 1.586872e-01
## HN15239A_Control 9.134434e-04 3.189807e-01 4.724707e-02